package org.zjvis.datascience.common.constant;

/**
 * @description 数据集管理常量
 * @date 2021-12-24
 */
public class DatasetConstant {

    /**
     * 默认csv分隔符
     */
    public final static String DEFAULT_SEPARATOR = ",";
    /**
     * 默认csv 数据分隔符，不对""中的数据进行分割
     */
    public final static String DEFAULT_DATA_SEPARATOR = ",(?=([^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*$)";

    /**
     * excel数据转csv分隔符，excel的cell里面可能会出现逗号，所以使用使用Tab
     */
    public final static String EXCEL_TO_CSV_SEPARATOR = "\t";

    /**
     * csv导入时默认空字段名 + n
     */
    public final static String DEFAULT_FIELD_NAME = "column_";

    public final static String DEFAULT_ID_FIELD = "_record_id_";

    /**
     * 文件导入数据类型：整数
     */
    public final static String DATA_INT = "int";

    /**
     * 文件导入数据类型：小数
     */
    public final static String DATA_DECIMAL = "decimal";

    /**
     * 文件导入数据类型：字符串
     */
    public final static String DATA_VARCHAR = "varchar";

    /**
     * 文件导入数据类型：日期
     */
    public final static String DATA_DATE = "date";

    /**
     * 文件导入数据类型：json
     */
    public final static String DATA_JSON = "json";

    /**
     * 文件导入数据类型：array
     */
    public final static String DATA_ARRAY = "array";

    /**
     * 数字类型最大长度
     */
    public final static int DATA_NUMBER_MAX_SIZE = 19;

    /**
     * 字符串最大长度
     */
    public final static int DATA_VARCHAR_MAX_SIZE = 100;

    /**
     * 非法字符
     */
    public final static String ILLEGAL_CHARACTER_REGEX = "[\\uFEFF\\u00A0]";

    /**
     * 空格或特殊字符正则
     */
    //public final static String SPECIAL_CHARACTER_REGEX = ".*[\\s\\p{Punct}].*";
    public final static String SPECIAL_CHARACTER_REGEX = ".*[`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~！@#￥%……&*（）——+|{}【】‘；：”“’。，、？\"].*";

    /**
     * 省略SPECIAL_CHARACTER_REGEX前后的.*，用于替换掉这些特殊字符
     */
    public final static String SPECIAL_CHARACTER_REPLACE = "[`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~！@#￥%……&*（）——+|{}【】‘；：”“’。，、？\"]";

    /**
     * 特殊字符提示消息
     */
    public final static String TIPS_SPECIAL_CHARACTER = "检测到您上传的文件做为表头的字段中有空格、标点符号等特殊字符，导入系统后会对这些字符进行特殊处理";

    /**
     * 数据接入预览条数
     */
    public final static int DATA_PREVIEW_SIZE = 50;

    /**
     * 数据接入，数据类型检测条数
     */
    public final static int DATA_TYPE_CHECK_SIZE = 5000;

    /**
     * excel数据接入，数据类型检测条数
     */
    public final static int EXCEL_DATA_TYPE_CHECK_SIZE = 500;

    /**
     * 数据接入非法字符判定
     */
    public final static String utf8IllegalChar =
            "[" +
//            "\u0081-\u009F" +//：特殊 (Specials);
                    "\\u9FA6-\\uA71F" +
                    "\\uA800-\\uABFF" +
                    "\\uD7B0-\\uFAFF" +
                    "\\uFB50-\\uFE1F" +
                    "\\uFE30-\\uFEFE" +     // FEFF不包含
                    "\\uFFF0-\\uFFFF" +
                    "]";
    ;

    /**
     * gpload上传失败日志，错误部分正则
     */
    public final static String GPLOAD_ERROR_REGEX = "ERROR:(.*?)encountered";

    public final static String PSQL_COPY_REGEX = "COPY (.*?)";
    /**
     * gpload上传成功日志
     */
    public final static String GPLOAD_STATUS_REGEX = "\\|INFO\\|rows Inserted\\s+= (.*?) .+\\|INFO\\|data formatting errors = (.*?) .+\\|INFO\\|gpload (.*?) ";

    /**
     * gpload上传失败日志，转 numeric失败正则
     */
    public final static String GPLOAD_ERROR_NUMBERIC_REGEX = "(.*)numeric(.*)";

    /**
     * gpload上传失败日志，转 date/time失败正则
     */
    public final static String GPLOAD_ERROR_DATE_TIME_REGEX = "(.*)date/time(.*)";

    /**
     * gpload上传失败日志，file not found 正则
     */
    public final static String GPLOAD_ERROR_FILE_NOT_FOUNT_REGEX = "(.*)file\\s+not\\s+found(.*)";

    public final static String GPLOAD_REJECT_LIMIT_ERROR = "(.*)segment reject limit reached, aborting operation(.*)";

    public final static String GPLOAD_ERROR_DETAIL_REGEX = "DETAIL:  Last error was: (.*)CONTEXT";

}
